it looks like there might be a difference between the high and low debt groups, the variance is decidedly higher in the high debt group.
d.both_completed %>%
ggplot(aes(x=sonarqube_issues, fill=high_debt_version)) +
geom_boxplot() +
labs(
title = "Number of issuess for the different debt levels",
x ="Number of issues"
) +
scale_y_continuous(breaks = NULL) +
scale_fill_manual(
name = "Debt level",
labels = c("High debt", "Low debt"),
values = c("#7070FF", "lightblue"),
guide = guide_legend(reverse = TRUE)
)
d.both_completed %>%
pull(sonarqube_issues) %>%
summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 1.000 1.659 3.000 12.000
sprintf("Variance: %.2f", var(pull(d.both_completed, sonarqube_issues)))
## [1] "Variance: 6.00"
Variable names are modeled using the negative binomial family rather than poisson since the variance is greater than the mean.
We include high_debt_verison as a predictor in our model as this variable represent the very effect we want to measure. We also include a varying intercept for each individual to prevent the model from learning too much from single participants with extreme measurements.
We iterate over the model until we have sane priors, in this case a prior that reasonably cna fit our data without being too restrictive.
sonarqube_issues.with <- extendable_model(
base_name = "sonarqube_issues",
base_formula = "sonarqube_issues ~ 1 + high_debt_version + (1 | session)",
base_priors = c(
prior(normal(0, 1), class = "b"),
prior(normal(1.5, 1), class = "Intercept"),
prior(exponential(1), class = "sd"),
prior(gamma(0.01, 0.01), class = "shape")
),
family = negbinomial(),
data = d.both_completed,
base_control = list(adapt_delta = 0.98)
)
prior_summary(sonarqube_issues.with(only_priors= TRUE))
prior_summary(sonarqube_issues.with(sample_prior = "only"))
pp_check(sonarqube_issues.with(sample_prior = "only"), nsamples = 400, type = "bars") + xlim(-1, 15)
We choose a beta prior that allows for large effects (+-10 issues) but is skeptical to any effects larger than +-4 issues.
sim.size <- 1000
sim.intercept <- rnorm(sim.size, 1.5, 1)
sim.beta <- rnorm(sim.size, 0, 1)
sim.beta.diff <- exp(sim.intercept + sim.beta) - exp(sim.intercept)
sim.beta.diff.min <- sim.beta.diff
data.frame(x = sim.beta.diff.min) %>%
ggplot(aes(x)) +
geom_density() +
xlim(-15, 15) +
labs(
title = "Beta parameter prior influence",
x = "Issues difference",
y = "Density"
)
We check the posterior distribution and can see that the model seems to have been able to fit the data well Sampling seems to also have worked well as Rhat values are close to 1 and the sampling plots look nice.
pp_check(sonarqube_issues.with(), nsamples = 200, type = "bars") + xlim(-1, 15)
summary(sonarqube_issues.with())
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: sonarqube_issues ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(data) (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.49 0.35 0.02 1.28 1.00 1081 1610
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 0.74 0.33 0.07 1.40 1.00 2981
## high_debt_versionfalse -0.66 0.43 -1.48 0.20 1.00 6236
## Tail_ESS
## Intercept 2646
## high_debt_versionfalse 2933
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 1.20 3.03 0.32 4.35 1.00 1280 913
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(sonarqube_issues.with(), ask = FALSE)
# default prior for monotonic predictor
edlvl_prior <- prior(dirichlet(2), class = "simo", coef = "moeducation_level1")
loo_result <- loo(
# Benchmark model(s)
sonarqube_issues.with(),
# New model(s)
sonarqube_issues.with("modified_lines"),
sonarqube_issues.with("work_domain"),
sonarqube_issues.with("work_experience_programming.s"),
sonarqube_issues.with("work_experience_java.s"),
sonarqube_issues.with("education_field"),
sonarqube_issues.with("mo(education_level)", edlvl_prior),
sonarqube_issues.with("workplace_peer_review"),
sonarqube_issues.with("workplace_td_tracking"),
sonarqube_issues.with("workplace_pair_programming"),
sonarqube_issues.with("workplace_coding_standards"),
sonarqube_issues.with("scenario"),
sonarqube_issues.with("group")
)
loo_result[2]
## $diffs
## elpd_diff se_diff
## sonarqube_issues.with("group") 0.0 0.0
## sonarqube_issues.with("work_domain") -0.7 2.1
## sonarqube_issues.with() -1.0 1.4
## sonarqube_issues.with("workplace_coding_standards") -1.2 1.4
## sonarqube_issues.with("workplace_peer_review") -1.4 1.3
## sonarqube_issues.with("workplace_pair_programming") -1.5 1.4
## sonarqube_issues.with("mo(education_level)", edlvl_prior) -1.5 1.4
## sonarqube_issues.with("scenario") -1.7 1.9
## sonarqube_issues.with("workplace_td_tracking") -1.7 1.5
## sonarqube_issues.with("education_field") -2.0 1.6
## sonarqube_issues.with("modified_lines") -2.1 1.5
## sonarqube_issues.with("work_experience_programming.s") -2.3 1.6
## sonarqube_issues.with("work_experience_java.s") -2.4 1.4
loo_result[1]
## $loos
## $loos$`sonarqube_issues.with()`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -77.8 7.8
## p_loo 6.6 1.2
## looic 155.6 15.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 27 61.4% 687
## (0.5, 0.7] (ok) 14 31.8% 203
## (0.7, 1] (bad) 3 6.8% 664
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("modified_lines")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -78.9 7.9
## p_loo 7.6 1.4
## looic 157.8 15.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 27 61.4% 595
## (0.5, 0.7] (ok) 12 27.3% 417
## (0.7, 1] (bad) 5 11.4% 41
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("work_domain")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -77.5 7.6
## p_loo 8.6 1.1
## looic 155.0 15.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 30 68.2% 1394
## (0.5, 0.7] (ok) 12 27.3% 292
## (0.7, 1] (bad) 2 4.5% 276
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("work_experience_programming.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -79.1 8.3
## p_loo 8.1 1.9
## looic 158.3 16.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 28 63.6% 1016
## (0.5, 0.7] (ok) 12 27.3% 317
## (0.7, 1] (bad) 3 6.8% 62
## (1, Inf) (very bad) 1 2.3% 38
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("work_experience_java.s")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -79.2 8.2
## p_loo 8.0 1.8
## looic 158.5 16.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 29 65.9% 340
## (0.5, 0.7] (ok) 10 22.7% 66
## (0.7, 1] (bad) 5 11.4% 28
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("education_field")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -78.8 8.0
## p_loo 7.6 1.3
## looic 157.6 16.0
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 29 65.9% 716
## (0.5, 0.7] (ok) 12 27.3% 107
## (0.7, 1] (bad) 3 6.8% 421
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("mo(education_level)", edlvl_prior)`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -78.3 7.8
## p_loo 7.2 1.2
## looic 156.7 15.6
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 28 63.6% 969
## (0.5, 0.7] (ok) 13 29.5% 322
## (0.7, 1] (bad) 3 6.8% 89
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("workplace_peer_review")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -78.3 7.9
## p_loo 7.1 1.3
## looic 156.5 15.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 26 59.1% 654
## (0.5, 0.7] (ok) 17 38.6% 144
## (0.7, 1] (bad) 1 2.3% 2579
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("workplace_td_tracking")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -78.5 7.9
## p_loo 7.2 1.3
## looic 157.0 15.9
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 30 68.2% 662
## (0.5, 0.7] (ok) 11 25.0% 255
## (0.7, 1] (bad) 3 6.8% 75
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("workplace_pair_programming")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -78.3 7.9
## p_loo 7.6 1.3
## looic 156.6 15.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 29 65.9% 418
## (0.5, 0.7] (ok) 12 27.3% 48
## (0.7, 1] (bad) 3 6.8% 151
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("workplace_coding_standards")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -78.0 8.1
## p_loo 8.0 1.6
## looic 156.0 16.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 31 70.5% 566
## (0.5, 0.7] (ok) 7 15.9% 112
## (0.7, 1] (bad) 6 13.6% 109
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("scenario")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -78.5 8.2
## p_loo 8.3 1.7
## looic 157.0 16.3
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 25 56.8% 601
## (0.5, 0.7] (ok) 13 29.5% 109
## (0.7, 1] (bad) 6 13.6% 50
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## $loos$`sonarqube_issues.with("group")`
##
## Computed from 4000 by 44 log-likelihood matrix
##
## Estimate SE
## elpd_loo -76.8 7.7
## p_loo 7.7 1.2
## looic 153.7 15.4
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 28 63.6% 1154
## (0.5, 0.7] (ok) 13 29.5% 364
## (0.7, 1] (bad) 3 6.8% 207
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
We inspect some of our top performing models.
All models seems to have sampled nicely (rhat = 1 and fluffy plots) they also have about the same fit to the data and similar estimates for the high_debt_version beta parameter.
We select the simplest model as a baseline.
sonarqube_issues0 <- brm(
"sonarqube_issues ~ 1 + high_debt_version + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(1.5, 1), class = "Intercept"),
prior(exponential(1), class = "sd"),
prior(gamma(0.01, 0.01), class = "shape")
),
family = negbinomial(),
data = d.both_completed,
control = list(adapt_delta = 0.97),
file = "fits/sonarqube_issues0",
file_refit = "on_change",
seed = 20210421
)
summary(sonarqube_issues0)
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: sonarqube_issues ~ 1 + high_debt_version + (1 | session)
## Data: d.both_completed (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.49 0.35 0.02 1.27 1.00 746 1243
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 0.74 0.32 0.12 1.40 1.00 2608
## high_debt_versionfalse -0.65 0.44 -1.51 0.23 1.00 4220
## Tail_ESS
## Intercept 2464
## high_debt_versionfalse 2595
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 1.08 1.66 0.31 3.43 1.00 1193 993
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(sonarqube_issues0)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.015426533 0.4532342 -0.9601534 0.9665391
## 6033d90a5af2c702367b3a96 -0.182111985 0.5247973 -1.4499718 0.7352185
## 6034fc165af2c702367b3a98 0.423284174 0.5469530 -0.3145141 1.7514600
## 603500725af2c702367b3a99 -0.151292373 0.5266088 -1.4593530 0.7825570
## 603f97625af2c702367b3a9d 0.434497996 0.5543786 -0.3000722 1.7628220
## 603fd5d95af2c702367b3a9e -0.043145124 0.4814010 -1.1429757 1.0077323
## 60409b7b5af2c702367b3a9f 0.223344768 0.4964740 -0.5677460 1.4698325
## 604b82b5a7718fbed181b336 0.374030701 0.5312487 -0.3717971 1.6742650
## 6050c1bf856f36729d2e5218 -0.138884597 0.5114334 -1.3611878 0.7969012
## 6050e1e7856f36729d2e5219 0.069333409 0.4480372 -0.7984438 1.1141293
## 6055fdc6856f36729d2e521b 0.132388707 0.4529984 -0.6950727 1.1976780
## 60589862856f36729d2e521f -0.291359806 0.6137007 -1.9097217 0.6457119
## 605afa3a856f36729d2e5222 -0.291471601 0.6287735 -1.9157843 0.6660266
## 605c8bc6856f36729d2e5223 -0.003762474 0.4555736 -0.9791205 1.0302080
## 605f3f2d856f36729d2e5224 0.090931779 0.4436929 -0.7612083 1.1819968
## 605f46c3856f36729d2e5225 -0.287443157 0.6158478 -1.8847050 0.6370332
## 60605337856f36729d2e5226 -0.287239287 0.6356421 -1.8549045 0.7020783
## 60609ae6856f36729d2e5228 -0.074123451 0.4691407 -1.1307098 0.8682720
## 6061ce91856f36729d2e522e -0.149444875 0.5170214 -1.4375312 0.8059823
## 6061f106856f36729d2e5231 -0.298277320 0.6191952 -1.8935122 0.6283760
## 6068ea9f856f36729d2e523e -0.029443952 0.4563865 -1.0217985 0.9418743
## 6075ab05856f36729d2e5247 0.054512901 0.4567384 -0.8394841 1.0704043
plot(sonarqube_issues0, ask = FALSE)
pp_check(sonarqube_issues0, nsamples = 200, type = "bars") + xlim(-1, 15)
We select the best performing model with one variable.
sonarqube_issues1 <- brm(
"sonarqube_issues ~ 1 + high_debt_version + (1 | session) + group",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(1.5, 1), class = "Intercept"),
prior(exponential(1), class = "sd"),
prior(gamma(0.01, 0.01), class = "shape")
),
family = negbinomial(),
data = d.both_completed,
control = list(adapt_delta = 0.97),
file = "fits/sonarqube_issues1",
file_refit = "on_change",
seed = 20210421
)
summary(sonarqube_issues1)
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: sonarqube_issues ~ 1 + high_debt_version + (1 | session) + group
## Data: d.both_completed (Number of observations: 44)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 22)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.44 0.32 0.02 1.17 1.00 1174 1650
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 0.29 0.44 -0.55 1.17 1.00 4454
## high_debt_versionfalse -0.67 0.42 -1.50 0.16 1.00 6103
## groupconsultants 0.76 0.59 -0.39 1.90 1.00 4848
## groupfriends 0.07 0.58 -1.08 1.19 1.00 4771
## groupprofessionalMcontact -0.57 0.87 -2.26 1.09 1.00 7240
## groupstudents 0.84 0.52 -0.22 1.88 1.00 4688
## Tail_ESS
## Intercept 2958
## high_debt_versionfalse 2733
## groupconsultants 3523
## groupfriends 3096
## groupprofessionalMcontact 3064
## groupstudents 3145
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 1.39 3.46 0.34 5.01 1.00 1534 1168
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(sonarqube_issues1)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033d69a5af2c702367b3a95 -0.081304183 0.4154695 -1.0507785 0.7185234
## 6033d90a5af2c702367b3a96 -0.207906216 0.5102610 -1.4961468 0.6337217
## 6034fc165af2c702367b3a98 0.285987844 0.4863475 -0.4684298 1.4921308
## 603500725af2c702367b3a99 -0.184409027 0.4879314 -1.3795912 0.6398960
## 603f97625af2c702367b3a9d 0.299337913 0.4848486 -0.3827799 1.5155103
## 603fd5d95af2c702367b3a9e -0.103707031 0.4516897 -1.2147470 0.7275142
## 60409b7b5af2c702367b3a9f 0.108857629 0.4217302 -0.6752135 1.1723715
## 604b82b5a7718fbed181b336 0.247295591 0.4689723 -0.4925896 1.4309938
## 6050c1bf856f36729d2e5218 -0.087359199 0.4740323 -1.1862427 0.8285805
## 6050e1e7856f36729d2e5219 0.147007605 0.4636384 -0.6660956 1.2719775
## 6055fdc6856f36729d2e521b 0.187719395 0.4738022 -0.6160901 1.3604608
## 60589862856f36729d2e521f -0.224264352 0.5592673 -1.6365107 0.6419202
## 605afa3a856f36729d2e5222 -0.167676956 0.5259302 -1.4880422 0.7405240
## 605c8bc6856f36729d2e5223 0.063002012 0.4234574 -0.8043535 1.0550375
## 605f3f2d856f36729d2e5224 0.193235530 0.4475086 -0.5283976 1.2881588
## 605f46c3856f36729d2e5225 -0.222059466 0.5523469 -1.5863270 0.6798019
## 60605337856f36729d2e5226 -0.222071079 0.5373562 -1.5855955 0.6186339
## 60609ae6856f36729d2e5228 -0.009862025 0.4439996 -0.9824828 0.9317650
## 6061ce91856f36729d2e522e -0.081482112 0.4663235 -1.1329120 0.8743682
## 6061f106856f36729d2e5231 -0.234384020 0.5445859 -1.6376065 0.6042225
## 6068ea9f856f36729d2e523e -0.068876149 0.4360224 -1.0744567 0.8206025
## 6075ab05856f36729d2e5247 -0.029421582 0.4082587 -0.9089109 0.8547237
plot(sonarqube_issues1, ask = FALSE)
pp_check(sonarqube_issues1, nsamples = 200, type = "bars") + xlim(-1, 15)
All candidate models look nice, none is significantly better than the others, we will proceed the simplest model: sonarqube_issues0
We will try a few different variations of the selected candidate model.
Some participants did only complete one scenario. Those has been excluded from the initial dataset to improve sampling of the models. We do however want to use all data we can and will therefore try to fit the model with the complete dataset.
sonarqube_issues0.all <- brm(
"sonarqube_issues ~ 1 + high_debt_version + (1 | session)",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(1.5, 1), class = "Intercept"),
prior(exponential(1), class = "sd"),
prior(gamma(0.01, 0.01), class = "shape")
),
family = negbinomial(),
data = as.data.frame(d.completed),
control = list(adapt_delta = 0.97),
file = "fits/sonarqube_issues0.all",
file_refit = "on_change",
seed = 20210421
)
summary(sonarqube_issues0.all)
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: sonarqube_issues ~ 1 + high_debt_version + (1 | session)
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.49 0.32 0.02 1.18 1.01 743 1851
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 0.72 0.31 0.11 1.34 1.00 2339
## high_debt_versionfalse -0.75 0.37 -1.48 -0.01 1.00 5947
## Tail_ESS
## Intercept 2521
## high_debt_versionfalse 3117
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 2.17 8.60 0.40 9.17 1.00 977 670
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(sonarqube_issues0.all)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 0.183040574 0.4889970 -0.6813311 1.3516260
## 6033d69a5af2c702367b3a95 -0.022249127 0.4556012 -0.9854840 0.9080399
## 6033d90a5af2c702367b3a96 -0.196944157 0.5188610 -1.4218062 0.7026811
## 6034fc165af2c702367b3a98 0.502413581 0.5656409 -0.2667781 1.8094462
## 603500725af2c702367b3a99 -0.156376146 0.4976038 -1.3184595 0.7737207
## 603f84f15af2c702367b3a9b 0.016367782 0.5051662 -1.0735628 1.1059288
## 603f97625af2c702367b3a9d 0.515408470 0.5721404 -0.2452590 1.8393040
## 603fd5d95af2c702367b3a9e -0.038504466 0.4519691 -1.0056673 0.8989632
## 60409b7b5af2c702367b3a9f 0.267869109 0.4855030 -0.5005796 1.4388090
## 604b82b5a7718fbed181b336 0.447229396 0.5358051 -0.2770527 1.6917425
## 604f1239a7718fbed181b33f -0.092856028 0.5027129 -1.2195323 0.9058201
## 6050c1bf856f36729d2e5218 -0.162251344 0.4868765 -1.3203645 0.7324989
## 6050e1e7856f36729d2e5219 0.095917928 0.4364640 -0.7396285 1.1013025
## 6055fdc6856f36729d2e521b 0.145725621 0.4525242 -0.7093635 1.1793200
## 60579f2a856f36729d2e521e -0.003263625 0.4961099 -1.0781715 1.0466290
## 60589862856f36729d2e521f -0.326339237 0.6118806 -1.9140853 0.5758993
## 605a30a7856f36729d2e5221 -0.159213909 0.5565989 -1.5253892 0.8176027
## 605afa3a856f36729d2e5222 -0.327880912 0.5971902 -1.8445310 0.5400549
## 605c8bc6856f36729d2e5223 -0.012246897 0.4433745 -0.9728114 0.9410496
## 605f3f2d856f36729d2e5224 0.127173044 0.4403228 -0.7162329 1.1216853
## 605f46c3856f36729d2e5225 -0.322431071 0.5804321 -1.7840608 0.5545071
## 60605337856f36729d2e5226 -0.321573439 0.5982842 -1.8468108 0.6160728
## 60609ae6856f36729d2e5228 -0.088924633 0.4779361 -1.1629988 0.8648741
## 6061ce91856f36729d2e522e -0.158527485 0.5030132 -1.3385997 0.7616958
## 6061f106856f36729d2e5231 -0.331280152 0.5782342 -1.7966120 0.5220248
## 60672faa856f36729d2e523c 0.003455516 0.4977603 -1.0466032 1.0487368
## 6068ea9f856f36729d2e523e -0.014497613 0.4434550 -0.9308466 0.9263245
## 606db69d856f36729d2e5243 -0.155764875 0.5681107 -1.4736105 0.8840138
## 6075ab05856f36729d2e5247 0.069091587 0.4502341 -0.8264055 1.1139903
plot(sonarqube_issues0.all, ask = FALSE)
pp_check(sonarqube_issues0.all, nsamples = 200, type = "bars") + xlim(-1, 15)
As including all data points didn’t harm the model we will create this variant with all data points as well.
This variation includes work_experience_programming.s predictors as it can give further insight into how experience play a factor in the effect we try to measure. This is especially important as our sampling shewed towards containing less experienced developer than the population at large.
sonarqube_issues0.all.exp <- brm(
"sonarqube_issues ~ 1 + high_debt_version + (1 | session) + work_experience_programming.s",
prior = c(
prior(normal(0, 1), class = "b"),
prior(normal(1.5, 1), class = "Intercept"),
prior(exponential(1), class = "sd"),
prior(gamma(0.01, 0.01), class = "shape")
),
family = negbinomial(),
data = as.data.frame(d.completed),
control = list(adapt_delta = 0.99),
file = "fits/sonarqube_issues0.all.exp",
file_refit = "on_change",
seed = 20210421
)
summary(sonarqube_issues0.all.exp)
## Family: negbinomial
## Links: mu = log; shape = identity
## Formula: sonarqube_issues ~ 1 + high_debt_version + (1 | session) + work_experience_programming.s
## Data: as.data.frame(d.completed) (Number of observations: 51)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~session (Number of levels: 29)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.47 0.32 0.02 1.16 1.01 781 1359
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept 0.73 0.30 0.15 1.32 1.00
## high_debt_versionfalse -0.80 0.37 -1.53 -0.08 1.00
## work_experience_programming.s -0.23 0.23 -0.68 0.23 1.00
## Bulk_ESS Tail_ESS
## Intercept 2420 2532
## high_debt_versionfalse 5619 3043
## work_experience_programming.s 3615 2923
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape 1.42 2.55 0.40 4.90 1.00 1159 1073
##
## Samples were drawn using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
ranef(sonarqube_issues0.all.exp)
## $session
## , , Intercept
##
## Estimate Est.Error Q2.5 Q97.5
## 6033c6fc5af2c702367b3a93 0.150231302 0.4893809 -0.7384628 1.3181015
## 6033d69a5af2c702367b3a95 -0.027721224 0.4465065 -0.9919532 0.9511171
## 6033d90a5af2c702367b3a96 -0.212867421 0.5167692 -1.4688272 0.6795067
## 6034fc165af2c702367b3a98 0.441811811 0.5364383 -0.2704206 1.7214052
## 603500725af2c702367b3a99 -0.166824801 0.4953391 -1.3589010 0.6924000
## 603f84f15af2c702367b3a9b -0.005408170 0.5000125 -1.1090082 1.0872495
## 603f97625af2c702367b3a9d 0.450718850 0.5441883 -0.2732800 1.7426025
## 603fd5d95af2c702367b3a9e -0.050682887 0.4607171 -1.1169910 0.8997919
## 60409b7b5af2c702367b3a9f 0.209953510 0.4697568 -0.5563558 1.3750000
## 604b82b5a7718fbed181b336 0.386380117 0.5118369 -0.3290899 1.6210893
## 604f1239a7718fbed181b33f -0.097352265 0.5259049 -1.3641330 0.8918844
## 6050c1bf856f36729d2e5218 -0.125149604 0.4858464 -1.2585625 0.7958516
## 6050e1e7856f36729d2e5219 0.081740545 0.4238045 -0.7271722 1.0743633
## 6055fdc6856f36729d2e521b 0.124592900 0.4369941 -0.6907562 1.1548175
## 60579f2a856f36729d2e521e 0.012462978 0.4872208 -1.0209262 1.0699733
## 60589862856f36729d2e521f -0.240040820 0.5723541 -1.6974480 0.6853926
## 605a30a7856f36729d2e5221 -0.145676166 0.5365815 -1.5214890 0.8356764
## 605afa3a856f36729d2e5222 -0.251102645 0.5557672 -1.5989025 0.6822767
## 605c8bc6856f36729d2e5223 0.005569367 0.4363755 -0.9154745 0.9611533
## 605f3f2d856f36729d2e5224 0.338293165 0.6121759 -0.5680731 1.8387200
## 605f46c3856f36729d2e5225 -0.300629687 0.5846514 -1.7749187 0.5449995
## 60605337856f36729d2e5226 -0.308509428 0.5801745 -1.7677882 0.5886754
## 60609ae6856f36729d2e5228 -0.109431355 0.4635303 -1.1715705 0.7831926
## 6061ce91856f36729d2e522e -0.163819830 0.5219103 -1.4126383 0.8137790
## 6061f106856f36729d2e5231 -0.303289256 0.5709239 -1.7602297 0.6000055
## 60672faa856f36729d2e523c 0.007519247 0.4901510 -1.0453803 1.0641403
## 6068ea9f856f36729d2e523e -0.012802872 0.4535053 -0.9922452 0.9685848
## 606db69d856f36729d2e5243 -0.121847161 0.5310919 -1.3890695 0.8916657
## 6075ab05856f36729d2e5247 0.042883483 0.4196267 -0.7868627 0.9914372
loo(
sonarqube_issues0.all,
sonarqube_issues0.all.exp
)
## Output of model 'sonarqube_issues0.all':
##
## Computed from 4000 by 51 log-likelihood matrix
##
## Estimate SE
## elpd_loo -88.0 8.1
## p_loo 8.3 1.6
## looic 176.1 16.2
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 31 60.8% 476
## (0.5, 0.7] (ok) 16 31.4% 190
## (0.7, 1] (bad) 4 7.8% 129
## (1, Inf) (very bad) 0 0.0% <NA>
## See help('pareto-k-diagnostic') for details.
##
## Output of model 'sonarqube_issues0.all.exp':
##
## Computed from 4000 by 51 log-likelihood matrix
##
## Estimate SE
## elpd_loo -89.6 8.9
## p_loo 9.9 2.7
## looic 179.2 17.8
## ------
## Monte Carlo SE of elpd_loo is NA.
##
## Pareto k diagnostic values:
## Count Pct. Min. n_eff
## (-Inf, 0.5] (good) 34 66.7% 704
## (0.5, 0.7] (ok) 13 25.5% 315
## (0.7, 1] (bad) 3 5.9% 130
## (1, Inf) (very bad) 1 2.0% 10
## See help('pareto-k-diagnostic') for details.
##
## Model comparisons:
## elpd_diff se_diff
## sonarqube_issues0.all 0.0 0.0
## sonarqube_issues0.all.exp -1.6 2.6
plot(sonarqube_issues0.all.exp, ask = FALSE)
pp_check(sonarqube_issues0.all.exp, nsamples = 200, type = "bars") + xlim(-1, 15)
This means that our final model, with all data points and experience predictors, is sonarqube_issues0.all.exp
To begin interpreting the model we look at how it’s parameters were estimated. As our research is focused on how the outcome of the model is effected we will mainly analyze the \(\beta\) parameters.
mcmc_areas(sonarqube_issues0.all.exp, pars = c("b_high_debt_versionfalse", "b_work_experience_programming.s"), prob = 0.95) + scale_y_discrete() +
scale_y_discrete(labels=c("High debt version: false", "Professional programming experience")) +
ggtitle("Beta parameters densities in sonarqube issues model", subtitle = "Shaded region marks 95% of the density. Line marks the median")
scale_programming_experience <- function(x) {
(x - mean(d.completed$work_experience_programming))/ sd(d.completed$work_experience_programming)
}
unscale_programming_experience <- function(x) {
x * sd(d.completed$work_experience_programming) + mean(d.completed$work_experience_programming)
}
post_settings <- expand.grid(
high_debt_version = c("false", "true"),
session = NA,
work_experience_programming.s = sapply(c(0, 3, 10, 25, 40), scale_programming_experience)
)
post <- posterior_predict(sonarqube_issues0.all.exp, newdata = post_settings) %>%
melt(value.name = "estimate", varnames = c("sample_number", "settings_id")) %>%
left_join(
rowid_to_column(post_settings, var= "settings_id"),
by = "settings_id"
) %>%
mutate(work_experience_programming = unscale_programming_experience(work_experience_programming.s)) %>%
select(
estimate,
high_debt_version,
work_experience_programming
)%>%
mutate(estimate = estimate)
ggplot(post, aes(x=estimate, fill = high_debt_version)) +
geom_bar(position = "dodge2") +
scale_fill_manual(
name = "Debt version",
labels = c("Low debt", "High debt"),
values = c("lightblue", "darkblue")
) +
facet_grid(rows = vars(work_experience_programming)) +
labs(
title = "SonarQube issues introduced / years of programming experience",
subtitle = "Estimated for five different experience levels",
x = "Issued introduced",
y = "Incidence rate"
) +
xlim(-1, 10) +
scale_x_continuous(limits = c(-1,7), breaks = c(0,1,2,3,4,5,6,7), labels = c("0","1","2","3","4","5","6","7")) +
scale_y_continuous(limits = NULL, breaks = c(500,1500,2500), labels = c("10%","30%","50%")) +
theme(legend.position = "top")
post.diff <- post %>% filter(high_debt_version == "true")
post.diff$estimate = post.diff$estimate - filter(post, high_debt_version == "false")$estimate
ggplot(post.diff, aes(x=estimate)) +
geom_boxplot(quantile_lines = TRUE, quantile_fun = hdi, vline_linetype = 2) +
xlim(-7, 7) +
facet_grid(rows = vars(work_experience_programming)) +
labs(
title = "SonarQube issues introduced difference / years of programming experience",
subtitle = "Difference as: high debt issues - low debt issues",
x = "Issues # difference"
) +
scale_y_continuous(breaks = NULL)
We can then proceed to calculate some likelihoods:
d <- post
d.high <- d %>% filter(high_debt_version == "true") %>% pull(estimate)
d.low <- d %>% filter(high_debt_version == "false") %>% pull(estimate)
x <- sum(d.high) / sum(d.low)
x
## [1] 2.180106
Given all the simulated cases we find that they introduce 118% more issues in the high debt version.
d <- post %>% filter(work_experience_programming == 10)
d.high <- d %>% filter(high_debt_version == "true") %>% pull(estimate)
d.low <- d %>% filter(high_debt_version == "false") %>% pull(estimate)
x <- sum(d.high) / sum(d.low)
x
## [1] 2.170288
Considering developers with 10 years of professional programming experience we find that they introduce 117% more issues in the high debt version.
d <- post %>% filter(work_experience_programming == 0)
d.high <- d %>% filter(high_debt_version == "true") %>% pull(estimate)
d.low <- d %>% filter(high_debt_version == "false") %>% pull(estimate)
x <- sum(d.high) / sum(d.low)
x
## [1] 2.128516
Considering developers with no of professional programming experience we find that they introduce 113% more issues in the high debt version.
d <- post %>% filter(work_experience_programming == 25)
d.high <- d %>% filter(high_debt_version == "true") %>% pull(estimate)
d.low <- d %>% filter(high_debt_version == "false") %>% pull(estimate)
x <- sum(d.high) / sum(d.low)
x
## [1] 2.204776
Considering developers with 25 years of professional programming experience we find that they introduce 120% more issues in the high debt version.